{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver \n",
    "import  time \n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-2-a9c677ada732>:18: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "\n",
    "\n",
    "#caps=dict()\n",
    "#caps[\"pageLoadStrategy\"] = \"none\"   # Do not wait for full page load\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') #指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') #谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') #隐藏滚动条, 应对些特殊页面\n",
    "#opts.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度\n",
    "#opts.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败\n",
    "# opts.binary_location = \"C:\\portable\\PortableApps\\IronPortable\\App\\Iron\\chrome.exe\"\n",
    "# opts.binary_location = \"C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe\" #\"H:\\_coding_\\Gitee\\InternetNewMedia\\CapstonePrj2016\\chromedriver.exe\"  \n",
    "\n",
    "\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get('https://www.cnki.net/')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 登录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南...'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.find_element_by_id(\"Ecp_loginShowName1\").get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 高级检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"highSearch\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CDwindow-B761C708489070A5B54F60798D338A91', 'CDwindow-3EC107F14352571435EC9ED5E033A27F']\n"
     ]
    }
   ],
   "source": [
    "print (driver.window_handles)\n",
    "driver.switch_to.window(driver.window_handles[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('/html/body/div[3]/div[1]/div/ul[1]/li[1]/a/span')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 专业检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/ul/li[4]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 关键词输入并检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/textarea')\n",
    "element.clear()\n",
    "element.send_keys(' SU=“人工智能\" AND SU=“网络”')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 检索\n",
    "element=driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/div[2]/input')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 统计文章数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1,388'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"countPageDiv\"]/span[1]/em').get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  更换为显示50篇"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('//*[@id=\"perPageDiv\"]/div/i')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//div[@id=\"perPageDiv\"]//li[@data-val=\"50\"]/a').click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 抓取页面信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>人工智能价值网络下零售企业商业模式创新与企业效益相关性分析</td>\n",
       "      <td>郭漫勤; 师佳英</td>\n",
       "      <td>商业经济研究</td>\n",
       "      <td>2021-06-21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>214.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的运用</td>\n",
       "      <td>任思颖</td>\n",
       "      <td>科技经济导刊</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>212.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>基于人工智能的网络空间安全防御战略研究</td>\n",
       "      <td>贾焰; 方滨兴; 李爱平; 顾钊铨</td>\n",
       "      <td>中国工程科学</td>\n",
       "      <td>2021-06-15 16:34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>232.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>人工智能赋能网络攻击的安全威胁及应对策略</td>\n",
       "      <td>方滨兴; 时金桥; 王忠儒; 余伟强</td>\n",
       "      <td>中国工程科学</td>\n",
       "      <td>2021-06-15 16:18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>193.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>基于专利技术共现网络的人工智能跨领域融合模式识别  网络首发</td>\n",
       "      <td>陈钰芬; 王科平</td>\n",
       "      <td>情报杂志</td>\n",
       "      <td>2021-06-08 17:53</td>\n",
       "      <td>NaN</td>\n",
       "      <td>161.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>郑秋泽</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>602.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>利用人工智能神经网络预测广州市PM2.5日浓度  网络首发</td>\n",
       "      <td>李泽群; 韦骏</td>\n",
       "      <td>北京大学学报(自然科学版)</td>\n",
       "      <td>2021-06-04 14:36</td>\n",
       "      <td>NaN</td>\n",
       "      <td>655.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>张爱国</td>\n",
       "      <td>现代工业经济和信息化</td>\n",
       "      <td>2021-05-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>40.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>人工智能在计算机网络技术中的应用探讨</td>\n",
       "      <td>马莉</td>\n",
       "      <td>科技创新与应用</td>\n",
       "      <td>2021-05-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>346.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>计算机网络中大数据与人工智能技术的应用</td>\n",
       "      <td>许妙水</td>\n",
       "      <td>计算机与网络</td>\n",
       "      <td>2021-05-26</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>杨子鸿</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>480.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>人工智能及大数据的网络安全态势感知研究</td>\n",
       "      <td>王晓娜; 李晓宇; 李芙蓉</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>498.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>试论计算机网络发展中的人工智能技术运用</td>\n",
       "      <td>王丽媛</td>\n",
       "      <td>电子世界</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>郁陶</td>\n",
       "      <td>电子世界</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>159.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>探析人工智能在计算机网络技术中的应用</td>\n",
       "      <td>程尹乔</td>\n",
       "      <td>电子世界</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>基于大数据时代人工智能在计算机网络技术中的应用</td>\n",
       "      <td>李晓霞</td>\n",
       "      <td>电子测试</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>44.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>人工智能在网络运维中的应用</td>\n",
       "      <td>李朝霞; 刘金春; 邢鑫</td>\n",
       "      <td>电子技术与软件工程</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>基于人工智能的网络智能化发展初探</td>\n",
       "      <td>从庆平; 张莉; 刘继平</td>\n",
       "      <td>中国管理信息化</td>\n",
       "      <td>2021-05-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>SPR天然产物小分子抑制剂的“人工智能”药物筛选和“网络药理”作用机制研究</td>\n",
       "      <td>艾中柱;王皓南;周珊珊;江经纬;袁胜涛</td>\n",
       "      <td>世界科学技术-中医药现代化</td>\n",
       "      <td>2021-05-14 11:30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>243.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>我国人工智能产业创新网络形成机制研究——基于知识流动的视角</td>\n",
       "      <td>岳圣元</td>\n",
       "      <td>新经济</td>\n",
       "      <td>2021-05-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>基于深度卷积神经网络的人工智能在喉鳞状细胞癌窄带成像辅助诊断中的应用</td>\n",
       "      <td>胡蓉;钟琦;徐文;黄志刚;程丽宇</td>\n",
       "      <td>中华耳鼻咽喉头颈外科杂志</td>\n",
       "      <td>2021-05-07 00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>大数据时代下人工智能在计算机网络技术中的运用探讨</td>\n",
       "      <td>杨文学</td>\n",
       "      <td>电脑知识与技术</td>\n",
       "      <td>2021-05-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>人工智能视角下的5G无线网络智能规划和优化</td>\n",
       "      <td>杨燚</td>\n",
       "      <td>现代工业经济和信息化</td>\n",
       "      <td>2021-04-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>104.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>“人工智能”赋能英语专业学生网络翻译学习现状与策略</td>\n",
       "      <td>龚骞; 马琳莉; 郭婷; 彭嘉; 易小渤</td>\n",
       "      <td>电脑知识与技术</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>103.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>大数据时代背景下人工智能在计算机网络技术中的应用探索</td>\n",
       "      <td>戚引松</td>\n",
       "      <td>科技与创新</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>397.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的应用</td>\n",
       "      <td>李殿涛</td>\n",
       "      <td>内江科技</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>572.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>人工智能技术在网络空间安全防御中的实践探究</td>\n",
       "      <td>牛文</td>\n",
       "      <td>无线互联科技</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>人工智能技术的认知光网络结构分析</td>\n",
       "      <td>牛文</td>\n",
       "      <td>电子世界</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>人工智能在输配电网络故障诊断中的应用分析</td>\n",
       "      <td>张成洲; 王瑜; 徐群; 魏可情; 吴若男</td>\n",
       "      <td>电子制作</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>33.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>人工智能视域下中国网络意识形态建设研究</td>\n",
       "      <td>张茂杰; 樊瑞科; 宋文旭</td>\n",
       "      <td>社科纵横</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>人工智能在计算机网络技术中的应用</td>\n",
       "      <td>姜华; 艾宪峰</td>\n",
       "      <td>电子技术与软件工程</td>\n",
       "      <td>2021-04-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>人工智能技术在计算机网络技术中的应用</td>\n",
       "      <td>苏晨</td>\n",
       "      <td>南方农机</td>\n",
       "      <td>2021-04-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>344.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>基于人工智能的网络安全管理研究</td>\n",
       "      <td>潘永路</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>230.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>计算机网络发展中的人工智能技术应用与研究</td>\n",
       "      <td>宋晶</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>361.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>大数据下人工智能计算机网络技术中的发展探究</td>\n",
       "      <td>郭福燕; 黄稳稳</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>377.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>人工智能在计算机网络技术中的应用</td>\n",
       "      <td>胡博</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>291.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>试析人工智能在计算机网络技术中的运用</td>\n",
       "      <td>王琰</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>1.0</td>\n",
       "      <td>136.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>计算机网络技术中人工智能的应用</td>\n",
       "      <td>王明宽</td>\n",
       "      <td>中阿科技论坛(中英文)</td>\n",
       "      <td>2021-04-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>基于专利信息的人工智能技术创新网络图谱研究</td>\n",
       "      <td>赵程程</td>\n",
       "      <td>中国科技论坛</td>\n",
       "      <td>2021-04-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>257.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>大数据网络安全防御中人工智能技术的运用</td>\n",
       "      <td>张超; 郑茗泽</td>\n",
       "      <td>中国新通信</td>\n",
       "      <td>2021-04-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>基于特征值分布和人工智能的网络入侵检测系统的研究与实现  网络首发</td>\n",
       "      <td>何俊鹏; 罗蕾; 肖堃; 张海涛; 李允</td>\n",
       "      <td>计算机应用研究</td>\n",
       "      <td>2021-04-02 15:40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>381.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>计算机网络技术在人工智能的应用</td>\n",
       "      <td>高波</td>\n",
       "      <td>电子技术与软件工程</td>\n",
       "      <td>2021-04-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>雷学智</td>\n",
       "      <td>信息记录材料</td>\n",
       "      <td>2021-04-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>人工智能在计算机网络技术中的应用</td>\n",
       "      <td>周公平</td>\n",
       "      <td>信息记录材料</td>\n",
       "      <td>2021-04-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>人工智能技术在网络空间安全防御中的应用</td>\n",
       "      <td>郁陶</td>\n",
       "      <td>电子世界</td>\n",
       "      <td>2021-03-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>168.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的应用</td>\n",
       "      <td>杨彦青; 郭献崇</td>\n",
       "      <td>科技风</td>\n",
       "      <td>2021-03-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>612.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>大数据背景下人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>段冬; 张娴</td>\n",
       "      <td>电脑知识与技术</td>\n",
       "      <td>2021-03-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>111.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>基于人工智能的配网网络发令系统及应用</td>\n",
       "      <td>裴俊;李林锐;邹敏佳;董轶杰;郑超</td>\n",
       "      <td>机械与电子</td>\n",
       "      <td>2021-03-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>基于人工智能的网络舆情大数据传播特征挖掘系统</td>\n",
       "      <td>洪晓艺</td>\n",
       "      <td>电子技术</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>80.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>人工智能在计算机网络技术中的应用研究</td>\n",
       "      <td>任冬; 张磊; 韩镇阳</td>\n",
       "      <td>中国新通信</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>64.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                     篇名                     作者  \\\n",
       "0            1          人工智能价值网络下零售企业商业模式创新与企业效益相关性分析               郭漫勤; 师佳英   \n",
       "1            2                  大数据时代人工智能在计算机网络技术中的运用                    任思颖   \n",
       "2            3                    基于人工智能的网络空间安全防御战略研究      贾焰; 方滨兴; 李爱平; 顾钊铨   \n",
       "3            4                   人工智能赋能网络攻击的安全威胁及应对策略     方滨兴; 时金桥; 王忠儒; 余伟强   \n",
       "4            5         基于专利技术共现网络的人工智能跨领域融合模式识别  网络首发               陈钰芬; 王科平   \n",
       "5            6                     人工智能在计算机网络技术中的应用研究                    郑秋泽   \n",
       "6            7          利用人工智能神经网络预测广州市PM2.5日浓度  网络首发                李泽群; 韦骏   \n",
       "7            8                     人工智能在计算机网络技术中的应用研究                    张爱国   \n",
       "8            9                     人工智能在计算机网络技术中的应用探讨                     马莉   \n",
       "9           10                    计算机网络中大数据与人工智能技术的应用                    许妙水   \n",
       "10          11                     人工智能在计算机网络技术中的应用研究                    杨子鸿   \n",
       "11          12                    人工智能及大数据的网络安全态势感知研究          王晓娜; 李晓宇; 李芙蓉   \n",
       "12          13                    试论计算机网络发展中的人工智能技术运用                    王丽媛   \n",
       "13          14                大数据时代人工智能在计算机网络技术中的应用研究                     郁陶   \n",
       "14          15                     探析人工智能在计算机网络技术中的应用                    程尹乔   \n",
       "15          16                基于大数据时代人工智能在计算机网络技术中的应用                    李晓霞   \n",
       "16          17                          人工智能在网络运维中的应用           李朝霞; 刘金春; 邢鑫   \n",
       "17          18                       基于人工智能的网络智能化发展初探           从庆平; 张莉; 刘继平   \n",
       "18          19  SPR天然产物小分子抑制剂的“人工智能”药物筛选和“网络药理”作用机制研究    艾中柱;王皓南;周珊珊;江经纬;袁胜涛   \n",
       "19          20          我国人工智能产业创新网络形成机制研究——基于知识流动的视角                    岳圣元   \n",
       "20          21     基于深度卷积神经网络的人工智能在喉鳞状细胞癌窄带成像辅助诊断中的应用       胡蓉;钟琦;徐文;黄志刚;程丽宇   \n",
       "21          22               大数据时代下人工智能在计算机网络技术中的运用探讨                    杨文学   \n",
       "22          23                  人工智能视角下的5G无线网络智能规划和优化                     杨燚   \n",
       "23          24              “人工智能”赋能英语专业学生网络翻译学习现状与策略   龚骞; 马琳莉; 郭婷; 彭嘉; 易小渤   \n",
       "24          25             大数据时代背景下人工智能在计算机网络技术中的应用探索                    戚引松   \n",
       "25          26                  大数据时代人工智能在计算机网络技术中的应用                    李殿涛   \n",
       "26          27                  人工智能技术在网络空间安全防御中的实践探究                     牛文   \n",
       "27          28                       人工智能技术的认知光网络结构分析                     牛文   \n",
       "28          29                   人工智能在输配电网络故障诊断中的应用分析  张成洲; 王瑜; 徐群; 魏可情; 吴若男   \n",
       "29          30                    人工智能视域下中国网络意识形态建设研究          张茂杰; 樊瑞科; 宋文旭   \n",
       "30          31                       人工智能在计算机网络技术中的应用                姜华; 艾宪峰   \n",
       "31          32                     人工智能技术在计算机网络技术中的应用                     苏晨   \n",
       "32          33                        基于人工智能的网络安全管理研究                    潘永路   \n",
       "33          34                   计算机网络发展中的人工智能技术应用与研究                     宋晶   \n",
       "34          35                  大数据下人工智能计算机网络技术中的发展探究               郭福燕; 黄稳稳   \n",
       "35          36                       人工智能在计算机网络技术中的应用                     胡博   \n",
       "36          37                     试析人工智能在计算机网络技术中的运用                     王琰   \n",
       "37          38                        计算机网络技术中人工智能的应用                    王明宽   \n",
       "38          39                  基于专利信息的人工智能技术创新网络图谱研究                    赵程程   \n",
       "39          40                    大数据网络安全防御中人工智能技术的运用                张超; 郑茗泽   \n",
       "40          41      基于特征值分布和人工智能的网络入侵检测系统的研究与实现  网络首发   何俊鹏; 罗蕾; 肖堃; 张海涛; 李允   \n",
       "41          42                        计算机网络技术在人工智能的应用                     高波   \n",
       "42          43                大数据时代人工智能在计算机网络技术中的应用研究                    雷学智   \n",
       "43          44                       人工智能在计算机网络技术中的应用                    周公平   \n",
       "44          45                    人工智能技术在网络空间安全防御中的应用                     郁陶   \n",
       "45          46                  大数据时代人工智能在计算机网络技术中的应用               杨彦青; 郭献崇   \n",
       "46          47               大数据背景下人工智能在计算机网络技术中的应用研究                 段冬; 张娴   \n",
       "47          48                     基于人工智能的配网网络发令系统及应用      裴俊;李林锐;邹敏佳;董轶杰;郑超   \n",
       "48          49                 基于人工智能的网络舆情大数据传播特征挖掘系统                    洪晓艺   \n",
       "49          50                     人工智能在计算机网络技术中的应用研究            任冬; 张磊; 韩镇阳   \n",
       "\n",
       "                刊名              发表时间   被引     下载  操作  \n",
       "0           商业经济研究        2021-06-21  NaN  214.0  下载  \n",
       "1           科技经济导刊        2021-06-18  NaN  212.0  下载  \n",
       "2           中国工程科学  2021-06-15 16:34  NaN  232.0  下载  \n",
       "3           中国工程科学  2021-06-15 16:18  NaN  193.0  下载  \n",
       "4             情报杂志  2021-06-08 17:53  NaN  161.0  下载  \n",
       "5   中小企业管理与科技(上旬刊)        2021-06-05  NaN  602.0  下载  \n",
       "6    北京大学学报(自然科学版)  2021-06-04 14:36  NaN  655.0  下载  \n",
       "7       现代工业经济和信息化        2021-05-30  NaN   40.0  下载  \n",
       "8          科技创新与应用        2021-05-28  NaN  346.0  下载  \n",
       "9           计算机与网络        2021-05-26  NaN   18.0  下载  \n",
       "10       网络安全技术与应用        2021-05-15  NaN  480.0  下载  \n",
       "11       网络安全技术与应用        2021-05-15  NaN  498.0  下载  \n",
       "12            电子世界        2021-05-15  NaN  107.0  下载  \n",
       "13            电子世界        2021-05-15  NaN  159.0  下载  \n",
       "14            电子世界        2021-05-15  NaN  101.0  下载  \n",
       "15            电子测试        2021-05-15  NaN   44.0  下载  \n",
       "16       电子技术与软件工程        2021-05-15  NaN   19.0  下载  \n",
       "17         中国管理信息化        2021-05-15  NaN    4.0  下载  \n",
       "18   世界科学技术-中医药现代化  2021-05-14 11:30  NaN  243.0  下载  \n",
       "19             新经济        2021-05-10  NaN   96.0  下载  \n",
       "20    中华耳鼻咽喉头颈外科杂志  2021-05-07 00:00  NaN    NaN  下载  \n",
       "21         电脑知识与技术        2021-05-05  NaN   30.0  下载  \n",
       "22      现代工业经济和信息化        2021-04-30  NaN  104.0  下载  \n",
       "23         电脑知识与技术        2021-04-25  NaN  103.0  下载  \n",
       "24           科技与创新        2021-04-25  NaN  397.0  下载  \n",
       "25            内江科技        2021-04-25  NaN  572.0  下载  \n",
       "26          无线互联科技        2021-04-25  NaN   43.0  下载  \n",
       "27            电子世界        2021-04-15  NaN   28.0  下载  \n",
       "28            电子制作        2021-04-15  NaN   33.0  下载  \n",
       "29            社科纵横        2021-04-15  NaN   49.0  下载  \n",
       "30       电子技术与软件工程        2021-04-15  NaN   39.0  下载  \n",
       "31            南方农机        2021-04-14  NaN  344.0  下载  \n",
       "32       网络安全技术与应用        2021-04-12  NaN  230.0  下载  \n",
       "33       网络安全技术与应用        2021-04-12  NaN  361.0  下载  \n",
       "34       网络安全技术与应用        2021-04-12  NaN  377.0  下载  \n",
       "35       网络安全技术与应用        2021-04-12  NaN  291.0  下载  \n",
       "36       网络安全技术与应用        2021-04-12  1.0  136.0  下载  \n",
       "37     中阿科技论坛(中英文)        2021-04-10  NaN   37.0  下载  \n",
       "38          中国科技论坛        2021-04-05  NaN  257.0  下载  \n",
       "39           中国新通信        2021-04-05  NaN   43.0  下载  \n",
       "40         计算机应用研究  2021-04-02 15:40  NaN  381.0  下载  \n",
       "41       电子技术与软件工程        2021-04-01  NaN   25.0  下载  \n",
       "42          信息记录材料        2021-04-01  NaN   20.0  下载  \n",
       "43          信息记录材料        2021-04-01  NaN   22.0  下载  \n",
       "44            电子世界        2021-03-30  NaN  168.0  下载  \n",
       "45             科技风        2021-03-29  NaN  612.0  下载  \n",
       "46         电脑知识与技术        2021-03-25  NaN  111.0  下载  \n",
       "47           机械与电子        2021-03-24  NaN   49.0  下载  \n",
       "48            电子技术        2021-03-20  NaN   80.0  下载  \n",
       "49           中国新通信        2021-03-20  NaN   64.0  下载  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element=driver.find_element_by_id('gridTable')\n",
    "data_html=element.get_attribute('innerHTML')\n",
    "pd.read_html(data_html)[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 爬取文章信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.switch_to.frame()\n",
    "from lxml import etree\n",
    "import re\n",
    "import requests\n",
    "from urllib.parse import urljoin\n",
    "HOST = \"https://kns.cnki.net/\"\n",
    "html = driver.page_source\n",
    "soup = etree.HTML(html)\n",
    "tr_list= soup.xpath('//div[@id=\"gridTable\"]/table/tbody/tr')\n",
    "headers = {\n",
    "    \"Cookie\":'Ecp_ClientId=3200615112602316607; cnkiUserKey=03369524-5653-0508-82c6-2fdbcdb21fe4; RsPerPage=20; _pk_ref=%5B%22%22%2C%22%22%2C1607047617%2C%22https%3A%2F%2Fwww.cnki.net%2F%22%5D; Ecp_ClientIp=202.116.81.140; UM_distinctid=178689abc586d3-0e68ae30691535-5771133-1fa400-178689abc597f5; Ecp_loginuserjf=15014134753; Ecp_session=1; ASP.NET_SessionId=gt3afeursglwft12mavvuvzf; SID_kns8=123111; SID_kns_new=kns123106; CurrSortFieldType=desc; SID_kcms=124103; SID_kxreader_new=011121; Hm_lvt_6e967eb120601ea41b9d312166416aa6=1621474773; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22179876d894d472-0e2dc24144c1fc-2363163-2073600-179876d894e760%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%7D%2C%22%24device_id%22%3A%22179876d894d472-0e2dc24144c1fc-2363163-2073600-179876d894e760%22%7D; Hm_lvt_ba7af201fc75865e9846f701ccb53e6b=1621474773; SID_kns=025123113; SID_klogin=125144; Hm_lpvt_6e967eb120601ea41b9d312166416aa6=1621475961; Hm_lpvt_ba7af201fc75865e9846f701ccb53e6b=1621475961; Ecp_loginuserbk=GDZSDX; knsLeftGroupSelectItem=null5%3B9%3B; CurrSortField=%e8%a2%ab%e5%bc%95%2f(%e8%a2%ab%e5%bc%95%e9%a2%91%e6%ac%a1%2c%27integer%27); _pk_ref=%5B%22%22%2C%22%22%2C1622444666%2C%22https%3A%2F%2Fwww.cnki.net%2F%22%5D; _pk_ses=*; _pk_id=90adc29b-dc70-48ff-8dcc-60e442d5a980.1619271339.5.1622444668.1622444666.; Ecp_LoginStuts={\"IsAutoLogin\":false,\"UserName\":\"GZ0513\",\"ShowName\":\"%e4%b8%ad%e5%b1%b1%e5%a4%a7%e5%ad%a6%e5%8d%97%e6%96%b9%e5%ad%a6%e9%99%a2\",\"UserType\":\"bk\",\"BUserName\":\"\",\"BShowName\":\"\",\"BUserType\":\"\",\"r\":\"4SVIml\"}; LID=WEEvREcwSlJHSldSdmVqM1BLVW9SQVdSTUtBeko5WUtLcE8xUldvbWhxRT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!; c_m_LinID=LinID=WEEvREcwSlJHSldSdmVqM1BLVW9SQVdSTUtBeko5WUtLcE8xUldvbWhxRT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!&ot=05/31/2021 15:24:33; c_m_expire=2021-05-31 15:24:33',\n",
    "    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'\n",
    "}\n",
    "def get_data(href):\n",
    "    DbCode = re.findall(\"DbCode=(.*?)&\",href)[0]\n",
    "    dbname = re.findall(\"dbname=(.*?)&\",href)[0]\n",
    "    filename = re.findall(\"filename=(.*?)&\",href)[0]\n",
    "    href = f\"https://kns.cnki.net/kcms/detail/detail.aspx?dbcode={DbCode}&dbname={dbname}&filename={filename}\"\n",
    "    return href\n",
    "\n",
    "def download(href):\n",
    "    page_source = requests.get(url=href,headers=headers).text\n",
    "    page_source_soup = etree.HTML(page_source)\n",
    "    download_url = page_source_soup.xpath('//li[@class=\"btn-dlpdf\"]/a/@href')\n",
    "    if download_url:\n",
    "        return urljoin(HOST,download_url[0])\n",
    "    else:\n",
    "        return \"没有下载链接\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def downloader(title,href):\n",
    "    status_code = requests.get(href,headers=headers)\n",
    "    if status_code == 200:\n",
    "        contends = status_code.content\n",
    "        with open(f\"{title}.pdf\",\"wb\") as fp:\n",
    "            fp.wirte(contents)\n",
    "        print(f\"{title}\",\"成功下载\")\n",
    "    else:\n",
    "        print(\"无法下载\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_pages = 28\n",
    "\n",
    "\n",
    "title_list = []\n",
    "author_list = []\n",
    "href_list = []\n",
    "download_url_list = []\n",
    "\n",
    "def main():\n",
    "    now_page_count = 1\n",
    "    while now_page_count < num_pages:\n",
    "        html = driver.page_source\n",
    "        try:\n",
    "            soup = etree.HTML(html)\n",
    "            tr_list= soup.xpath('//div[@id=\"gridTable\"]/table/tbody/tr')\n",
    "            for tr in tr_list:\n",
    "                title = \"\".join(tr.xpath('td[@class=\"name\"]//text()')).strip().replace(\"\\n\",\"\").replace(\"                              网络首发\",\"\")\n",
    "                author = \",\".join(tr.xpath('td[@class=\"author\"]/a//text()')).strip().replace(\"\\n\",\"\")\n",
    "                href = tr.xpath('td[@class=\"name\"]/a/@href')[0]\n",
    "                href = get_data(href)\n",
    "                download_url = download(href)\n",
    "#                 print(download_url)\n",
    "                title_list.append(title)\n",
    "                author_list.append(author)\n",
    "                href_list.append(href)\n",
    "                download_url_list.append(download_url)\n",
    "            now_page_count = now_page_count + 1\n",
    "            driver.find_element_by_xpath('//*[@id=\"PageNext\"]').click()\n",
    "            time.sleep(0.5)\n",
    "        except:\n",
    "            time.sleep(10)# 解决500多遇到验证码问题\n",
    "    data = {\"标题\":title_list,\"作者\":author_list,\"链接\":href_list,\"下载链接\":download_url_list}\n",
    "    data = pd.DataFrame(data)\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>链接</th>\n",
       "      <th>下载链接</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>人工智能价值网络下零售企业商业模式创新与企业效益相关性分析</td>\n",
       "      <td>郭漫勤,师佳英</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>大数据时代人工智能在计算机网络技术中的运用</td>\n",
       "      <td>任思颖</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>基于人工智能的网络空间安全防御战略研究</td>\n",
       "      <td>贾焰,方滨兴,李爱平,顾钊铨</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>人工智能赋能网络攻击的安全威胁及应对策略</td>\n",
       "      <td>方滨兴,时金桥,王忠儒,余伟强</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>基于专利技术共现网络的人工智能跨领域融合模式识别</td>\n",
       "      <td>陈钰芬,王科平</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1345</th>\n",
       "      <td>计算机网络与人工智能</td>\n",
       "      <td>吴洪森</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1346</th>\n",
       "      <td>基于人工智能的VoIP网络QoS专家系统的设计与实现</td>\n",
       "      <td>罗芳,张顺颐,王攀</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1347</th>\n",
       "      <td>基于网络和人工智能的图书馆信息管理系统研究</td>\n",
       "      <td>阳学军</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1348</th>\n",
       "      <td>简述人工智能技术在网络安全管理中的应用</td>\n",
       "      <td>马秀荣,王化宇</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1349</th>\n",
       "      <td>人工智能技术在计算机网络教育中的应用</td>\n",
       "      <td>苏新彦,陈三丽</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1350 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 标题               作者  \\\n",
       "0     人工智能价值网络下零售企业商业模式创新与企业效益相关性分析          郭漫勤,师佳英   \n",
       "1             大数据时代人工智能在计算机网络技术中的运用              任思颖   \n",
       "2               基于人工智能的网络空间安全防御战略研究   贾焰,方滨兴,李爱平,顾钊铨   \n",
       "3              人工智能赋能网络攻击的安全威胁及应对策略  方滨兴,时金桥,王忠儒,余伟强   \n",
       "4          基于专利技术共现网络的人工智能跨领域融合模式识别          陈钰芬,王科平   \n",
       "...                             ...              ...   \n",
       "1345                     计算机网络与人工智能              吴洪森   \n",
       "1346     基于人工智能的VoIP网络QoS专家系统的设计与实现        罗芳,张顺颐,王攀   \n",
       "1347          基于网络和人工智能的图书馆信息管理系统研究              阳学军   \n",
       "1348            简述人工智能技术在网络安全管理中的应用          马秀荣,王化宇   \n",
       "1349             人工智能技术在计算机网络教育中的应用          苏新彦,陈三丽   \n",
       "\n",
       "                                                     链接  \\\n",
       "0     https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1     https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "2     https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "3     https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "4     https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "...                                                 ...   \n",
       "1345  https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1346  https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1347  https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1348  https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1349  https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "\n",
       "                                                   下载链接  \n",
       "0     https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1     https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "2     https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "3     https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "4     https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "...                                                 ...  \n",
       "1345  https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1346  https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1347  https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1348  https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1349  https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "\n",
       "[1350 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data = main()\n",
    "display(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_excel('CNKI_数据挖掘.xlsx',sheet_name=\"文章\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 下载为pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_excel(\"CNKI_数据挖掘.xlsx\")\n",
    "base = \"https://kns.cnki.net/\"\n",
    "count = 450\n",
    "end_count = 500\n",
    "title_list = data[\"标题\"].to_list()[count:end_count]\n",
    "download_url_list = data[\"链接\"].to_list()[count:end_count]\n",
    "for title,download_url in zip(title_list,download_url_list):\n",
    "    try:\n",
    "        driver.get(download_url)\n",
    "        time.sleep(0.5)\n",
    "        js = 'document.getElementById(\"pdfDown\").setAttribute(\"target\",\"\");'\n",
    "        driver.execute_script(js)\n",
    "        time.sleep(0.5)\n",
    "        driver.find_element_by_xpath('//li[@class=\"btn-dlpdf\"]').click()\n",
    "        time.sleep(0.5)\n",
    "        source = driver.page_source\n",
    "        if \"安全验证\" in source: # 如果安全验证在html内，那么就需要调用api\n",
    "            print(\"已经进入验证码阶段\")\n",
    "            time.sleep(2)\n",
    "            url_path = re.findall('<dt><img id=\"vImg\" src=\"(.*?)\" alt=\"验证码\" title=\"点击切换验证码\"></dt>',source)[0] # 取出图片url\n",
    "            picture_url = urljoin(base,url_path)\n",
    "            content = requests.get(picture_url).content\n",
    "            picture_name = f\"./code/{uuid4()}\"\n",
    "            driver.get_screenshot_as_file(picture_name+\".jpg\")\n",
    "            cut_picture(picture_name)\n",
    "            result = base64_api(uname='#####省去', pwd='#####省去', img=picture_name+\".png\", typeid=3)\n",
    "            print(\"识别结果为：\",result) # 识别出的图片结果\n",
    "            driver.find_element_by_id('vcode').send_keys(result) # 将图片结果填入\n",
    "            time.sleep(5)\n",
    "            driver.find_element_by_xpath('/html/body/div/form/dl/dd/button').click() # 点击提交进行下载\n",
    "    except:\n",
    "        print(f'{title}----没有下载权限')\n",
    "        continue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导出refworks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium.webdriver.common.action_chains import ActionChains\n",
    "move = driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[2]/a')\n",
    "ActionChains(driver).move_to_element(move).perform()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "move = driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[2]/ul/li[1]/a')\n",
    "ActionChains(driver).move_to_element(move).perform()\n",
    "driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[2]/ul/li[1]/ul/li[8]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 切换窗口\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出\n",
    "element=driver.find_element_by_xpath('//*[@id=\"litotxt\"]/a')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('D:/CNKI/CNKI-20210630234110140.txt', encoding='utf-8') as f:\n",
    "    display(f.readlines())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
